##
## FIGURE - MAP: DISTRIBUTION OF DLS WORLDWIDE 
##

rm(list=ls())

##
## LOAD PACKAGES ---------------------------------------------------------
## 

library(tidyverse)
library(countrycode)
library(sf)
library(RColorBrewer)
library(ggpubr)
library(svglite)
library(EnvStats) # To add sample size

##
## SET WORKING DIRECTORY
##

#setwd("")

##
## LOAD DATA  ------------------------------------------------------------
## 

#> keep only the most relevant info in the main data
load("Complete DLS data file_regional level.RData")

#> adding information about world regions

dls_region$worldregion23 <- countrycode(dls_region$country_name, origin = "country.name", destination = "region23" )
table(dls_region$worldregion23 )

#> keeping only cases in the data for the last available year
aux1 <- dls_region %>% 
  group_by(region_num_raw,
  country_name,
  worldregion23) %>% 
  mutate(wave_max = max(wave)) %>% 
  ungroup() %>% 
  filter(wave == wave_max)

table(aux1$dls_min7_region)

#> Creating a new categorical variable based on a continuous variable
#> which we want to plot in the map. This we do with cuts command


aux1<- aux1 %>% 
  mutate(dls_min7_region_cat = cut(dls_min7_region,
                             breaks = c(-Inf,0.05, 0.1, 0.15, 0.2,0.3,0.4, 0.5, 0.6, 0.7,0.8, Inf),
                                            labels = c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11"))) 
table(aux1$dls_min7_region_cat)

#> Get world map
map.world <- map_data('world') %>% 
  rename("Country"="region")  

#> Load DHS shapefile
geo_shp <- read_sf("dhs_shapefile.gpkg")
class(geo_shp)
table(geo_shp$CountryName)

#> GEOLEVEL is the main identfier. Here I make sure it's numeric
geo_shp <- geo_shp %>% 
  left_join(aux1, by=c("CountryName" = "country_name",
                       "RegionId" = "region_num_raw"))

table(geo_shp$dls_min7_region, useNA = "always")
class(geo_shp$dls_min7_region)

#> Removing cases with missing information from dataset (mainly Paraguay)
d.check <- geo_shp %>% filter(is.na(dls_min7_region))
geo_shp <- geo_shp %>% filter(CountryName != "Paraguay")

#> Defining color palette and plot
mycolors <- colorRampPalette(brewer.pal(9, "Blues"))(11)

g1 <- ggplot() +
  geom_polygon(data=map.world, 
               aes( x = long, y = lat, group = group), 
               fill="#dedede")+
  geom_sf(data=geo_shp, 
          aes(fill=dls_min7_region_cat))+
  theme_light()+
  theme(axis.title = element_blank(),
        axis.text=element_text(size=12),
        legend.position="bottom",
        legend.title = element_text(size=13))+
  coord_sf(ylim = c(-55,80),xlim=c(-115,153))+
  guides(fill = guide_legend(title.position = "top", title.hjust=0.5,
                             nrow=1, 
                             label.position = "bottom",
                             label.theme = element_text(size=11),
                             keywidth = 2.5))+
  scale_fill_manual(name="% population with access to > 2/3 DLS services",
                    values=mycolors, 
                    labels=c("  <5%", "6-10%","11-15%", "15-20%", 
                             "21-30%", "31-40%", "41-50%",
                             "51-60%", "61-70%", "71-80%", 
                             ">80%  "),
                    na.translate = F)+
  ggtitle("")

#g1

aux1$iso3c <- countrycode(aux1$country_name, origin = "country.name", destination = "iso3c" )

g2 <- aux1 %>% 
  group_by(iso3c) %>% 
  mutate(dls_min7_region_median = median(dls_min7_region, na.rm=T)) %>% 
  ungroup() %>% 
  ggplot(aes(x=fct_reorder(iso3c,-dls_min7_region_median), y=dls_min7_region)) +
  geom_boxplot(fill= "#e6f0ef", outlier.size = 2,outlier.shape = 4,)+
  ylab("% population with access to > 2/3 DLS services")+xlab("")+
  scale_y_continuous(labels=scales::percent)+
  theme_light()+
  ggtitle("")+
  theme(axis.text=element_text(size=12),
        axis.text.x = element_text(angle = 60, vjust=1, hjust=1, size=11),
        axis.title=element_text(size=13))+
  stat_n_text(size = 3.5, angle = 90) #Add Sample Size
#g2


#> How many regions where less than 1% of households has achieved all DLS
aux1 <- aux1 %>% 
  mutate(dls_min10_region_binary = ifelse(dls_min10_region<0.01,0,
                                          ifelse(dls_min10_region>=0.01,1,NA)))
table(aux1$dls_min10_region_binary, useNA = "always")
prop.table(table(aux1$dls_min10_region_binary))

g3 <- aux1 %>% 
  gather(dls_min5_region, dls_min7_region, dls_min10_region, key="threshold", value="share") %>% 
  mutate(order=recode(threshold, 
                      dls_min5_region = 3L,
                      dls_min7_region=2L,
                      dls_min10_region=1L),
         threshold=recode(threshold,
                          dls_min5_region="> 1/2 of DLS dimensions",
                          dls_min7_region="> 2/3 of DLS dimensions",
                          dls_min10_region="All DLS dimensions")) %>% 
  ggplot() + 
  geom_histogram(aes(x=share, fill=threshold, color=threshold),binwidth=0.1,alpha=0.8)+
  facet_wrap(facets = ~fct_reorder(threshold, order), nrow=1)+
  theme_bw()+
  scale_fill_manual(name="",
                     values=c("#e6f0ef", "#e6f0ef",  "#e6f0ef"))+
  scale_color_manual(name="",
                    values=c("#141414",  "#141414", "#141414"))+
  ylab("Number of regions")+xlab("% population exceeding the threshold in each subnational region")+
  scale_x_continuous(labels=scales::percent)+
  theme(legend.position = "none",
        strip.text = element_text(face = "bold", size=11),
        strip.background = element_rect(fill = "White"),
        axis.text=element_text(size=12),
        axis.title=element_text(size=13))+
  ggtitle("")
#g3

#g12

g123 <- ggarrange(g3, g1, g2, nrow=3, labels = c("A", "B", "C"),
                 heights=c(1.2,2,1.5),
                 widths=c(1, 1,1.2),
                 hjust=c(0,0,0),
                 vjust=c(1,1,1),
                 font.label=list(size=25))

ggsave(plot = g123,
       filename = "figure 1.png", 
       width=12, height=15.8, dpi = 300)

ggsave(plot = g123,
       filename = "figure 1.svg", 
       width=12, height=16.5, dpi = 300)



